[Autogluon] X범주를 다 넣어버렀음;;

Author

김보람

Published

January 23, 2024

lst = [result1,result2,result3,result4,result5,result6,result7,result8,result9,result10,result11,result12,result13]
pd.concat(lst)
accuracy_score precision_score recall_score f1_score roc_auc_score
0 0.996170 0.987583 0.993339 0.990452 0.995109
0 0.994172 0.986770 0.993892 0.990318 0.994092
0 0.994006 0.994979 0.990008 0.992487 0.993339
0 0.987788 0.973559 0.996530 0.984911 0.989246
0 0.989787 0.974545 0.991858 0.983125 0.990379
0 0.992895 0.987328 0.995003 0.991150 0.993247
0 0.981682 0.916031 1.000000 0.956175 0.988554
0 0.985012 0.956444 0.995375 0.975521 0.987973
0 0.989176 0.980137 0.993060 0.986556 0.989823
0 0.990841 0.986784 0.995003 0.990876 0.990842
0 0.964369 0.848656 1.000000 0.918133 0.977736
0 0.978022 0.931677 1.000000 0.964630 0.984308
0 0.974359 0.946741 0.991674 0.968686 0.977246

imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import networkx as nx
import sklearn
import xgboost as xgb

# sklearn
from sklearn import model_selection # split함수이용
from sklearn import ensemble # RF,GBM
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# gnn
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv

# autogluon
from autogluon.tabular import TabularDataset, TabularPredictor
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:18: UserWarning: An issue occurred while importing 'pyg-lib'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/libpyg.so: undefined symbol: _ZN2at4_ops12split_Tensor4callERKNS_6TensorEN3c106SymIntEl
  warnings.warn(f"An issue occurred while importing 'pyg-lib'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:31: UserWarning: An issue occurred while importing 'torch-scatter'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_scatter/_scatter_cuda.so: undefined symbol: _ZNK3c107SymBool10guard_boolEPKcl
  warnings.warn(f"An issue occurred while importing 'torch-scatter'. "
/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_geometric/typing.py:42: UserWarning: An issue occurred while importing 'torch-sparse'. Disabling its usage. Stacktrace: /home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/torch_sparse/_diag_cuda.so: undefined symbol: _ZN3c106detail19maybe_wrap_dim_slowIlEET_S2_S2_b
  warnings.warn(f"An issue occurred while importing 'torch-sparse'. "
    def throw(df, fraud_rate):  # 사기 거래 비율에 맞춰 버려지는 함수!
        df1 = df[df['is_fraud'] == 1].copy()
        df0 = df[df['is_fraud'] == 0].copy()
        df0_downsample = (len(df1) * (1-fraud_rate)) / (len(df0) * fraud_rate)
        df0_down = df0.sample(frac=df0_downsample, random_state=42)
        df_p = pd.concat([df1, df0_down])
        return df_p
    
    def split_dataframe(data_frame, test_fraud_rate, test_rate=0.3):
        n = len(data_frame)
    
        # 사기 거래와 정상 거래를 분리
        fraud_data = data_frame[data_frame['is_fraud'] == 1]
        normal_data = data_frame[data_frame['is_fraud'] == 0]

        # 테스트 데이터 크기 계산
        test_samples = int(test_fraud_rate * (n * test_rate))
        remaining_test_samples = int(n * test_rate) - test_samples
    
        # 사기 거래 및 정상 거래에서 무작위로 테스트 데이터 추출
        test_fraud_data = fraud_data.sample(n=test_samples, replace=False)
        test_normal_data = normal_data.sample(n=remaining_test_samples, replace=False)

        # 테스트 데이터 합치기
        test_data = pd.concat([test_normal_data, test_fraud_data])

        # 훈련 데이터 생성
        train_data = data_frame[~data_frame.index.isin(test_data.index)]

        return train_data, test_data
    
    def concat(df_tr, df_tst):   
        df = pd.concat([df_tr, df_tst])
        train_mask = np.concatenate((np.full(len(df_tr), True), np.full(len(df_tst), False)))    # index꼬이는거 방지하기 위해서? ★ (이거,, 훔,,?(
        test_mask =  np.concatenate((np.full(len(df_tr), False), np.full(len(df_tst), True))) 
        mask = (train_mask, test_mask)
        return df, mask
        
    def evaluation(y, yhat):
        metrics = [sklearn.metrics.accuracy_score,
                   sklearn.metrics.precision_score,
                   sklearn.metrics.recall_score,
                   sklearn.metrics.f1_score,
                   sklearn.metrics.roc_auc_score]
        return pd.DataFrame({m.__name__:[m(y,yhat).round(6)] for m in metrics})
        
    def compute_time_difference(group):
        n = len(group)
        result = []
        for i in range(n):
            for j in range(n):
                time_difference = abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
                result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
        return result

    def edge_index_save(df, unique_col, theta, gamma):
        groups = df.groupby(unique_col)
        edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
        edge_index = edge_index.astype(np.float64)
        filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        
        while os.path.exists(filename):
            self.save_attempt += 1
            filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        np.save(filename, edge_index)
        #tetha = edge_index_plust_itme[:,].mean()
    
        
        edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
        edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
        return edge_index
    
    def edge_index(df, unique_col, theta, gamma):
        groups = df.groupby(unique_col)
        edge_index = np.array([item for sublist in (compute_time_difference(group) for _, group in groups) for item in sublist])
        edge_index = edge_index.astype(np.float64)
       # filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        
        # while os.path.exists(filename):
        #     self.save_attempt += 1
        #     filename = f"edge_index_attempt{self.save_attempt}_{str(unique_col).replace(' ', '').replace('_', '')}.npy"
        # np.save(filename, edge_index)
        #tetha = edge_index_plust_itme[:,].mean()
    
        
        edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
        edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
        return edge_index
fraudTrain = pd.read_csv("~/Desktop/fraudTrain.csv").iloc[:,1:]
fraudTrain = fraudTrain.assign(trans_date_trans_time= list(map(lambda x: pd.to_datetime(x), fraudTrain.trans_date_trans_time)))
fraudTrain
trans_date_trans_time cc_num merchant category amt first last gender street city ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
0 2019-01-01 00:00:00 2.703190e+15 fraud_Rippin, Kub and Mann misc_net 4.97 Jennifer Banks F 561 Perry Cove Moravian Falls ... 36.0788 -81.1781 3495 Psychologist, counselling 1988-03-09 0b242abb623afc578575680df30655b9 1325376018 36.011293 -82.048315 0
1 2019-01-01 00:00:00 6.304230e+11 fraud_Heller, Gutmann and Zieme grocery_pos 107.23 Stephanie Gill F 43039 Riley Greens Suite 393 Orient ... 48.8878 -118.2105 149 Special educational needs teacher 1978-06-21 1f76529f8574734946361c461b024d99 1325376044 49.159047 -118.186462 0
2 2019-01-01 00:00:00 3.885950e+13 fraud_Lind-Buckridge entertainment 220.11 Edward Sanchez M 594 White Dale Suite 530 Malad City ... 42.1808 -112.2620 4154 Nature conservation officer 1962-01-19 a1a22d70485983eac12b5b88dad1cf95 1325376051 43.150704 -112.154481 0
3 2019-01-01 00:01:00 3.534090e+15 fraud_Kutch, Hermiston and Farrell gas_transport 45.00 Jeremy White M 9443 Cynthia Court Apt. 038 Boulder ... 46.2306 -112.1138 1939 Patent attorney 1967-01-12 6b849c168bdad6f867558c3793159a81 1325376076 47.034331 -112.561071 0
4 2019-01-01 00:03:00 3.755340e+14 fraud_Keeling-Crist misc_pos 41.96 Tyler Garcia M 408 Bradley Rest Doe Hill ... 38.4207 -79.4629 99 Dance movement psychotherapist 1986-03-28 a41d7549acf90789359a9aa5346dcb46 1325376186 38.674999 -78.632459 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1048570 2020-03-10 16:07:00 6.011980e+15 fraud_Fadel Inc health_fitness 77.00 Haley Wagner F 05561 Farrell Crescent Annapolis ... 39.0305 -76.5515 92106 Accountant, chartered certified 1943-05-28 45ecd198c65e81e597db22e8d2ef7361 1362931649 38.779464 -76.317042 0
1048571 2020-03-10 16:07:00 4.839040e+15 fraud_Cremin, Hamill and Reichel misc_pos 116.94 Meredith Campbell F 043 Hanson Turnpike Hedrick ... 41.1826 -92.3097 1583 Geochemist 1999-06-28 c00ce51c6ebb7657474a77b9e0b51f34 1362931670 41.400318 -92.726724 0
1048572 2020-03-10 16:08:00 5.718440e+11 fraud_O'Connell, Botsford and Hand home 21.27 Susan Mills F 005 Cody Estates Louisville ... 38.2507 -85.7476 736284 Engineering geologist 1952-04-02 17c9dc8b2a6449ca2473726346e58e6c 1362931711 37.293339 -84.798122 0
1048573 2020-03-10 16:08:00 4.646850e+18 fraud_Thompson-Gleason health_fitness 9.52 Julia Bell F 576 House Crossroad West Sayville ... 40.7320 -73.1000 4056 Film/video editor 1990-06-25 5ca650881b48a6a38754f841c23b77ab 1362931718 39.773077 -72.213209 0
1048574 2020-03-10 16:08:00 2.283740e+15 fraud_Buckridge PLC misc_pos 6.81 Shannon Williams F 9345 Spencer Junctions Suite 183 Alpharetta ... 34.0770 -84.3033 165556 Prison officer 1997-12-27 8d0a575fe635bbde12f1a2bffc126731 1362931730 33.601468 -83.891921 0

1048575 rows × 22 columns

# df2, mask = concat(df_tr, df_tst)
# def compute_time_difference2(group):
#     n = len(group)
#     result = []
#     for i in range(n):
#         for j in range(n):
#             time_difference = abs((group.iloc[i].trans_date_trans_time - group.iloc[j].trans_date_trans_time).total_seconds())
#             result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
#     return result
# def edge_index2(df, unique_col, theta, gamma, hms='s'):
#     groups = df.groupby(unique_col)
#     edge_index = np.array([item for sublist in (compute_time_difference2(group) for _, group in groups) for item in sublist])
#     edge_index = edge_index.astype(np.float64)
#     filename = f"edge_index{str(unique_col).replace(' ', '').replace('_', '')}.npy"  # 저장
#     np.save(filename, edge_index)
#     edge_index[:,2] = (np.exp(-edge_index[:,2]/(theta)) != 1)*(np.exp(-edge_index[:,2]/(theta))).tolist()
#     edge_index = torch.tensor([(int(row[0]), int(row[1])) for row in edge_index if row[2] > gamma], dtype=torch.long).t()
#     return edge_index

# edge_index2(df,'cc_num', 8.028000e+04, 0.3) # 시도 1

Autogluon(0.3 / 0.2)

df = throw(fraudTrain, 0.3)

df_tr, df_tst = split_dataframe(df, 0.2)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result1 = evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_012122/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_012122/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   613.64 GB / 982.82 GB (62.4%)
Train Data Rows:    14014
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22736.4 MB
    Train Data (Original)  Memory Usage: 11.91 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 2.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.03s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36b41353a0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8888   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.14s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a7a2bee0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.9229   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.14s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9707   = Validation score   (accuracy)
    4.22s    = Training   runtime
    0.56s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.967    = Validation score   (accuracy)
    5.01s    = Training   runtime
    0.36s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9595   = Validation score   (accuracy)
    0.76s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9585   = Validation score   (accuracy)
    0.84s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9909   = Validation score   (accuracy)
    23.83s   = Training   runtime
    0.14s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9682   = Validation score   (accuracy)
    0.4s     = Training   runtime
    0.33s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9667   = Validation score   (accuracy)
    0.42s    = Training   runtime
    0.32s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9465   = Validation score   (accuracy)
    23.2s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9749   = Validation score   (accuracy)
    10.11s   = Training   runtime
    0.23s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9409   = Validation score   (accuracy)
    33.45s   = Training   runtime
    0.2s     = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9689   = Validation score   (accuracy)
    8.61s    = Training   runtime
    0.4s     = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9946   = Validation score   (accuracy)
    2.9s     = Training   runtime
    0.02s    = Validation runtime
AutoGluon training complete, total runtime = 124.58s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_012122/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe310>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.3 / 0.3)

df = throw(fraudTrain, 0.3)

df_tr, df_tst = split_dataframe(df, 0.3)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result2 = evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_012327/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_012327/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   613.15 GB / 982.82 GB (62.4%)
Train Data Rows:    14014
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22822.68 MB
    Train Data (Original)  Memory Usage: 11.91 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 2.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.0s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fec10>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8876   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.14s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe1f0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.9252   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.11s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9704   = Validation score   (accuracy)
    3.02s    = Training   runtime
    0.45s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9712   = Validation score   (accuracy)
    7.16s    = Training   runtime
    0.54s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9584   = Validation score   (accuracy)
    0.76s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9578   = Validation score   (accuracy)
    0.83s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9907   = Validation score   (accuracy)
    23.9s    = Training   runtime
    0.14s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9662   = Validation score   (accuracy)
    0.42s    = Training   runtime
    0.33s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9657   = Validation score   (accuracy)
    0.42s    = Training   runtime
    0.32s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9455   = Validation score   (accuracy)
    24.07s   = Training   runtime
    0.27s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9752   = Validation score   (accuracy)
    10.86s   = Training   runtime
    0.19s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9449   = Validation score   (accuracy)
    32.98s   = Training   runtime
    0.19s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9722   = Validation score   (accuracy)
    8.47s    = Training   runtime
    0.55s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9944   = Validation score   (accuracy)
    2.77s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 126.78s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_012327/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36b43e8940>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.3 / 0.4)

df = throw(fraudTrain, 0.3)

df_tr, df_tst = split_dataframe(df, 0.4)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result3 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_012536/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_012536/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   612.64 GB / 982.82 GB (62.3%)
Train Data Rows:    14014
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22858.13 MB
    Train Data (Original)  Memory Usage: 11.91 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 2.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.02s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79feee0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8828   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.11s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79feb80>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.9244   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.11s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.975    = Validation score   (accuracy)
    6.79s    = Training   runtime
    0.69s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9717   = Validation score   (accuracy)
    3.98s    = Training   runtime
    0.31s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9583   = Validation score   (accuracy)
    0.79s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.958    = Validation score   (accuracy)
    0.81s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9905   = Validation score   (accuracy)
    31.22s   = Training   runtime
    0.13s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9635   = Validation score   (accuracy)
    0.4s     = Training   runtime
    0.33s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9638   = Validation score   (accuracy)
    0.41s    = Training   runtime
    0.33s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9448   = Validation score   (accuracy)
    23.15s   = Training   runtime
    0.26s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9775   = Validation score   (accuracy)
    8.09s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9452   = Validation score   (accuracy)
    43.09s   = Training   runtime
    0.19s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9732   = Validation score   (accuracy)
    7.67s    = Training   runtime
    0.46s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9941   = Validation score   (accuracy)
    2.77s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 139.96s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_012536/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36b4135310>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.4 / 0.2)

df = throw(fraudTrain, 0.3)

df_tr, df_tst = split_dataframe(df, 0.3)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result4 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_012758/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_012758/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   612.14 GB / 982.82 GB (62.3%)
Train Data Rows:    14014
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22836.61 MB
    Train Data (Original)  Memory Usage: 11.92 MB (0.1% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 2.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.04s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fedc0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.887    = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.35s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fedc0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.9231   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9711   = Validation score   (accuracy)
    3.84s    = Training   runtime
    0.46s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9701   = Validation score   (accuracy)
    5.64s    = Training   runtime
    0.4s     = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9575   = Validation score   (accuracy)
    0.74s    = Training   runtime
    0.29s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9573   = Validation score   (accuracy)
    0.82s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9909   = Validation score   (accuracy)
    34.52s   = Training   runtime
    0.14s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9647   = Validation score   (accuracy)
    0.4s     = Training   runtime
    0.33s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9652   = Validation score   (accuracy)
    0.43s    = Training   runtime
    0.33s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.947    = Validation score   (accuracy)
    23.44s   = Training   runtime
    0.27s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.975    = Validation score   (accuracy)
    10.07s   = Training   runtime
    0.19s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9441   = Validation score   (accuracy)
    31.0s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9707   = Validation score   (accuracy)
    8.44s    = Training   runtime
    0.39s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9947   = Validation score   (accuracy)
    2.9s     = Training   runtime
    0.02s    = Validation runtime
AutoGluon training complete, total runtime = 133.53s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_012758/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36b41353a0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.4 / 0.3)

df = throw(fraudTrain, 0.4)

df_tr, df_tst = split_dataframe(df, 0.3)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result5 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_013012/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_013012/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   611.63 GB / 982.82 GB (62.2%)
Train Data Rows:    10511
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22823.64 MB
    Train Data (Original)  Memory Usage: 8.94 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.1s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.87 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.07s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe670>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8668   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.11s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe670>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.906    = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.09s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9658   = Validation score   (accuracy)
    2.75s    = Training   runtime
    0.3s     = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9659   = Validation score   (accuracy)
    3.75s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.955    = Validation score   (accuracy)
    0.6s     = Training   runtime
    0.25s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9554   = Validation score   (accuracy)
    0.66s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9912   = Validation score   (accuracy)
    22.41s   = Training   runtime
    0.13s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9605   = Validation score   (accuracy)
    0.35s    = Training   runtime
    0.27s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9602   = Validation score   (accuracy)
    0.36s    = Training   runtime
    0.27s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9343   = Validation score   (accuracy)
    17.51s   = Training   runtime
    0.22s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9746   = Validation score   (accuracy)
    8.34s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9338   = Validation score   (accuracy)
    45.3s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9667   = Validation score   (accuracy)
    5.89s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9935   = Validation score   (accuracy)
    2.23s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 120.81s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_013012/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ff430>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.4 / 0.4)

df = throw(fraudTrain, 0.4)

df_tr, df_tst = split_dataframe(df, 0.4)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result6 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_013214/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_013214/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   611.23 GB / 982.82 GB (62.2%)
Train Data Rows:    10511
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22798.57 MB
    Train Data (Original)  Memory Usage: 8.93 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.87 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.99s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ffc10>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8626   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.1s     = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ffee0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.904    = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.07s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9635   = Validation score   (accuracy)
    2.67s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9634   = Validation score   (accuracy)
    3.28s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9537   = Validation score   (accuracy)
    0.62s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9522   = Validation score   (accuracy)
    0.67s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9892   = Validation score   (accuracy)
    17.0s    = Training   runtime
    0.13s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9617   = Validation score   (accuracy)
    0.35s    = Training   runtime
    0.28s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9606   = Validation score   (accuracy)
    0.36s    = Training   runtime
    0.27s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9284   = Validation score   (accuracy)
    17.85s   = Training   runtime
    0.23s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9701   = Validation score   (accuracy)
    9.41s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9325   = Validation score   (accuracy)
    38.1s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9629   = Validation score   (accuracy)
    5.73s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9932   = Validation score   (accuracy)
    2.16s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 108.04s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_013214/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36bfee71f0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.5 / 0.2)

df = throw(fraudTrain, 0.5)

df_tr, df_tst = split_dataframe(df, 0.2)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result7 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_013403/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_013403/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   610.84 GB / 982.82 GB (62.2%)
Train Data Rows:    8409
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22790.9 MB
    Train Data (Original)  Memory Usage: 7.14 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.9s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.92s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56f70>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8533   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.08s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56040>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8932   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.07s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9612   = Validation score   (accuracy)
    2.25s    = Training   runtime
    0.22s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9662   = Validation score   (accuracy)
    1.32s    = Training   runtime
    0.05s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9583   = Validation score   (accuracy)
    0.56s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9578   = Validation score   (accuracy)
    0.61s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9904   = Validation score   (accuracy)
    16.22s   = Training   runtime
    0.11s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.964    = Validation score   (accuracy)
    0.33s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9634   = Validation score   (accuracy)
    0.33s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9288   = Validation score   (accuracy)
    14.34s   = Training   runtime
    0.19s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9743   = Validation score   (accuracy)
    5.61s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9231   = Validation score   (accuracy)
    35.56s   = Training   runtime
    0.15s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9644   = Validation score   (accuracy)
    4.88s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9925   = Validation score   (accuracy)
    1.84s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 93.94s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_013403/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fea60>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.5 / 0.3)

df = throw(fraudTrain, 0.5)

df_tr, df_tst = split_dataframe(df, 0.3)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result8 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_013538/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_013538/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   610.51 GB / 982.82 GB (62.1%)
Train Data Rows:    8409
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22799.64 MB
    Train Data (Original)  Memory Usage: 7.14 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.9s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.91s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79feca0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8465   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.09s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe8b0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.889    = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.05s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9597   = Validation score   (accuracy)
    2.19s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9641   = Validation score   (accuracy)
    2.39s    = Training   runtime
    0.1s     = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9531   = Validation score   (accuracy)
    0.55s    = Training   runtime
    0.22s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.955    = Validation score   (accuracy)
    0.54s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9885   = Validation score   (accuracy)
    17.61s   = Training   runtime
    0.12s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9656   = Validation score   (accuracy)
    0.33s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9661   = Validation score   (accuracy)
    0.32s    = Training   runtime
    0.27s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9235   = Validation score   (accuracy)
    14.51s   = Training   runtime
    0.18s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9716   = Validation score   (accuracy)
    5.66s    = Training   runtime
    0.13s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9229   = Validation score   (accuracy)
    34.19s   = Training   runtime
    0.15s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9596   = Validation score   (accuracy)
    4.95s    = Training   runtime
    0.13s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9906   = Validation score   (accuracy)
    1.84s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 94.88s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_013538/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36b4135430>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.5 / 0.4)

df = throw(fraudTrain, 0.5)

df_tr, df_tst = split_dataframe(df, 0.4)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result9 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_013851/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_013851/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   609.84 GB / 982.82 GB (62.1%)
Train Data Rows:    8409
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22734.13 MB
    Train Data (Original)  Memory Usage: 7.14 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.9s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.91s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56040>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8385   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.09s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff560d0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8849   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.07s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9593   = Validation score   (accuracy)
    2.03s    = Training   runtime
    0.19s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9634   = Validation score   (accuracy)
    1.78s    = Training   runtime
    0.06s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9543   = Validation score   (accuracy)
    0.51s    = Training   runtime
    0.21s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9548   = Validation score   (accuracy)
    0.57s    = Training   runtime
    0.21s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9881   = Validation score   (accuracy)
    15.56s   = Training   runtime
    0.12s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9636   = Validation score   (accuracy)
    0.34s    = Training   runtime
    0.24s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9622   = Validation score   (accuracy)
    0.35s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9208   = Validation score   (accuracy)
    14.8s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9705   = Validation score   (accuracy)
    5.51s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9289   = Validation score   (accuracy)
    35.44s   = Training   runtime
    0.15s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9605   = Validation score   (accuracy)
    4.89s    = Training   runtime
    0.14s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9906   = Validation score   (accuracy)
    1.82s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 93.62s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_013851/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79908b0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.5 / 0.5)

df = throw(fraudTrain, 0.5)

df_tr, df_tst = split_dataframe(df, 0.5)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result10 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_014026/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_014026/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   609.52 GB / 982.82 GB (62.0%)
Train Data Rows:    8409
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22734.36 MB
    Train Data (Original)  Memory Usage: 7.15 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.9s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.49 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.93s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ff700>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8377   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.06s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ff1f0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8861   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.05s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.958    = Validation score   (accuracy)
    2.14s    = Training   runtime
    0.22s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9602   = Validation score   (accuracy)
    1.3s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9487   = Validation score   (accuracy)
    0.53s    = Training   runtime
    0.21s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9478   = Validation score   (accuracy)
    0.66s    = Training   runtime
    0.2s     = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9881   = Validation score   (accuracy)
    13.05s   = Training   runtime
    0.12s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9594   = Validation score   (accuracy)
    0.35s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9621   = Validation score   (accuracy)
    0.35s    = Training   runtime
    0.23s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9218   = Validation score   (accuracy)
    14.28s   = Training   runtime
    0.2s     = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9702   = Validation score   (accuracy)
    4.76s    = Training   runtime
    0.11s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9258   = Validation score   (accuracy)
    33.2s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9584   = Validation score   (accuracy)
    4.14s    = Training   runtime
    0.08s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9919   = Validation score   (accuracy)
    1.83s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 86.65s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_014026/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56a60>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.6 / 0.2)

df = throw(fraudTrain, 0.6)

df_tr, df_tst = split_dataframe(df, 0.2)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result11 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_014153/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_014153/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   609.21 GB / 982.82 GB (62.0%)
Train Data Rows:    7007
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22688.56 MB
    Train Data (Original)  Memory Usage: 5.95 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    1.0s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.25 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 1.03s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8563   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36aff56790>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8952   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.06s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9612   = Validation score   (accuracy)
    1.91s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9712   = Validation score   (accuracy)
    1.72s    = Training   runtime
    0.06s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9639   = Validation score   (accuracy)
    0.42s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9649   = Validation score   (accuracy)
    0.53s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9879   = Validation score   (accuracy)
    10.01s   = Training   runtime
    0.1s     = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9449   = Validation score   (accuracy)
    0.32s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9446   = Validation score   (accuracy)
    0.33s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9308   = Validation score   (accuracy)
    11.8s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9753   = Validation score   (accuracy)
    4.25s    = Training   runtime
    0.09s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.906    = Validation score   (accuracy)
    30.02s   = Training   runtime
    0.14s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9635   = Validation score   (accuracy)
    3.54s    = Training   runtime
    0.07s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9879   = Validation score   (accuracy)
    1.65s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 76.0s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_014153/")

Autogluon(0.6 / 0.3)

df = throw(fraudTrain, 0.6)

df_tr, df_tst = split_dataframe(df, 0.3)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result12 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_014310/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_014310/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   608.99 GB / 982.82 GB (62.0%)
Train Data Rows:    7007
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22693.55 MB
    Train Data (Original)  Memory Usage: 5.95 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.9s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.25 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.89s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79ff280>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8453   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.05s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fff70>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8786   = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9585   = Validation score   (accuracy)
    2.52s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9646   = Validation score   (accuracy)
    1.37s    = Training   runtime
    0.05s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.962    = Validation score   (accuracy)
    0.52s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9589   = Validation score   (accuracy)
    0.48s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9889   = Validation score   (accuracy)
    12.33s   = Training   runtime
    0.11s    = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9513   = Validation score   (accuracy)
    0.31s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9471   = Validation score   (accuracy)
    0.31s    = Training   runtime
    0.17s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9245   = Validation score   (accuracy)
    12.24s   = Training   runtime
    0.16s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9765   = Validation score   (accuracy)
    3.44s    = Training   runtime
    0.09s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.8918   = Validation score   (accuracy)
    27.16s   = Training   runtime
    0.15s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9588   = Validation score   (accuracy)
    4.3s     = Training   runtime
    0.1s     = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.989    = Validation score   (accuracy)
    1.66s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 76.15s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_014310/")
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fedc0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'

Autogluon(0.6 / 0.4)

df = throw(fraudTrain, 0.6)

df_tr, df_tst = split_dataframe(df, 0.4)

tr = TabularDataset(df_tr)
tst = TabularDataset(df_tst)

predictr = TabularPredictor("is_fraud")

predictr.fit(tr, presets='best_quality')

y = tst.is_fraud
yhat = predictr.predict(tst)

result13 =evaluation(y,yhat)
No path specified. Models will be saved in: "AutogluonModels/ag-20240125_014426/"
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240125_014426/"
AutoGluon Version:  0.8.2
Python Version:     3.8.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #38~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Nov  2 18:01:13 UTC 2
Disk Space Avail:   608.74 GB / 982.82 GB (61.9%)
Train Data Rows:    7007
Train Data Columns: 21
Label Column: is_fraud
Preprocessing data ...
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
    2 unique label values:  [1, 0]
    If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])
Selected class <--> label mapping:  class 1 = 1, class 0 = 0
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
    Available Memory:                    22692.89 MB
    Train Data (Original)  Memory Usage: 5.95 MB (0.0% of available memory)
    Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
    Stage 1 Generators:
        Fitting AsTypeFeatureGenerator...
            Note: Converting 1 features to boolean dtype as they only contain 2 unique values.
    Stage 2 Generators:
        Fitting FillNaFeatureGenerator...
    Stage 3 Generators:
        Fitting IdentityFeatureGenerator...
        Fitting CategoryFeatureGenerator...
            Fitting CategoryMemoryMinimizeFeatureGenerator...
        Fitting DatetimeFeatureGenerator...
        Fitting TextSpecialFeatureGenerator...
            Fitting BinnedFeatureGenerator...
            Fitting DropDuplicatesFeatureGenerator...
        Fitting TextNgramFeatureGenerator...
            Fitting CountVectorizer for text features: ['street']
            CountVectorizer fit with vocabulary size = 2
    Stage 4 Generators:
        Fitting DropUniqueFeatureGenerator...
    Stage 5 Generators:
        Fitting DropDuplicatesFeatureGenerator...
    Unused Original Features (Count: 1): ['trans_num']
        These features were not used to generate any of the output features. Add a feature generator compatible with these features to utilize them.
        Features can also be unused if they carry very little information, such as being categorical but having almost entirely unique values or being duplicates of other features.
        These features do not need to be present at inference time.
        ('object', []) : 1 | ['trans_num']
    Types of features in original data (raw dtype, special dtypes):
        ('datetime', [])                   : 1 | ['trans_date_trans_time']
        ('float', [])                      : 6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                        : 3 | ['zip', 'city_pop', 'unix_time']
        ('object', [])                     : 8 | ['merchant', 'category', 'first', 'last', 'gender', ...]
        ('object', ['datetime_as_object']) : 1 | ['dob']
        ('object', ['text'])               : 1 | ['street']
    Types of features in processed data (raw dtype, special dtypes):
        ('category', [])                    :  7 | ['merchant', 'category', 'first', 'last', 'city', ...]
        ('category', ['text_as_category'])  :  1 | ['street']
        ('float', [])                       :  6 | ['cc_num', 'amt', 'lat', 'long', 'merch_lat', ...]
        ('int', [])                         :  3 | ['zip', 'city_pop', 'unix_time']
        ('int', ['binned', 'text_special']) :  8 | ['street.char_count', 'street.word_count', 'street.capital_ratio', 'street.lower_ratio', 'street.digit_ratio', ...]
        ('int', ['bool'])                   :  1 | ['gender']
        ('int', ['datetime_as_int'])        : 10 | ['trans_date_trans_time', 'trans_date_trans_time.year', 'trans_date_trans_time.month', 'trans_date_trans_time.day', 'trans_date_trans_time.dayofweek', ...]
        ('int', ['text_ngram'])             :  1 | ['__nlp__.suite']
    0.8s = Fit runtime
    20 features in original data used to generate 37 features in processed data.
    Train Data (Processed) Memory Usage: 1.25 MB (0.0% of available memory)
Data preprocessing and feature engineering runtime = 0.77s ...
AutoGluon will gauge predictive performance using evaluation metric: 'accuracy'
    To change this, specify the eval_metric parameter of Predictor()
User-specified model hyperparameters to be fit:
{
    'NN_TORCH': {},
    'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}}, {}, 'GBMLarge'],
    'CAT': {},
    'XGB': {},
    'FASTAI': {},
    'RF': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'XT': [{'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}}, {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression', 'quantile']}}],
    'KNN': [{'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}}, {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}}],
}
Fitting 13 L1 models ...
Fitting model: KNeighborsUnif_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79fe310>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.84     = Validation score   (accuracy)
    0.01s    = Training   runtime
    0.04s    = Validation runtime
Fitting model: KNeighborsDist_BAG_L1 ...
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f36a79feee0>
Traceback (most recent call last):
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/coco/anaconda3/envs/py38/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
    0.8818   = Validation score   (accuracy)
    0.0s     = Training   runtime
    0.04s    = Validation runtime
Fitting model: LightGBMXT_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9575   = Validation score   (accuracy)
    1.64s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: LightGBM_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9636   = Validation score   (accuracy)
    1.41s    = Training   runtime
    0.05s    = Validation runtime
Fitting model: RandomForestGini_BAG_L1 ...
    0.9595   = Validation score   (accuracy)
    0.54s    = Training   runtime
    0.16s    = Validation runtime
Fitting model: RandomForestEntr_BAG_L1 ...
    0.9593   = Validation score   (accuracy)
    0.5s     = Training   runtime
    0.16s    = Validation runtime
Fitting model: CatBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9873   = Validation score   (accuracy)
    11.4s    = Training   runtime
    0.1s     = Validation runtime
Fitting model: ExtraTreesGini_BAG_L1 ...
    0.9602   = Validation score   (accuracy)
    0.33s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: ExtraTreesEntr_BAG_L1 ...
    0.9589   = Validation score   (accuracy)
    0.32s    = Training   runtime
    0.18s    = Validation runtime
Fitting model: NeuralNetFastAI_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9235   = Validation score   (accuracy)
    12.32s   = Training   runtime
    0.17s    = Validation runtime
Fitting model: XGBoost_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.9747   = Validation score   (accuracy)
    4.39s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: NeuralNetTorch_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.8938   = Validation score   (accuracy)
    33.16s   = Training   runtime
    0.14s    = Validation runtime
Fitting model: LightGBMLarge_BAG_L1 ...
    Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy
    0.959    = Validation score   (accuracy)
    4.41s    = Training   runtime
    0.12s    = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
    0.9873   = Validation score   (accuracy)
    1.59s    = Training   runtime
    0.01s    = Validation runtime
AutoGluon training complete, total runtime = 81.47s ... Best model: "WeightedEnsemble_L2"
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20240125_014426/")